import sys


def deduplicate_by_field(input_file , index):
    dict = {}
    with open(input_file, 'r', encoding='utf8') as f:
        for line in f:
            if not line.strip():
                continue
            fields = line.strip().split('|')
            if index > len(fields):
                print("下标超出范围")
                return
            field = fields[index-1]
            if field not in dict:
                dict[field] = line
    with open(input_file+"out", 'w', encoding='utf-8') as outfile:
        # 按原始顺序写入（字典顺序即插入顺序）
        outfile.writelines(dict.values())

    return

dir = r"C:\Users\sherry\Desktop\ODMS测试数据\test.txtout"

def main():

    deduplicate_by_field(dir, 2)

    return


if __name__ == '__main__':
    main()






